rm(list = ls())
library(ggplot2)
library(data.table)
library(foreign)
library(plyr)
library(extrafont)
setwd("")

source("code/function-createdemograph.R")

#Make t0/t1 phone vs. online demographics graph
#Pull in data
t1.t0.phone_mail.data <- read.dta('data/t1_t0_phone_mail_anon.dta')
#All other tranches are online only

science.df <- matrix(NA, nrow=8, ncol=8)

science.df[,1] <- c(rep("sci_waves_correct_t2",2), rep("sci_ocean_correct_t2",2), rep("sci_waterboil_correct_t2",2), rep("sci_poliovaccine_correct_t2",2))

science.df[,2] <- c(rep("% Radio Waves Correct",2), rep("% Ocean Tides Correct",2), rep("% Water Boil Correct",2), rep("% Jonas Salk Correct",2))

science.df[,3] <- rep(c("2014 Pew Panel", "Online Respondents"),2)

science.df[,8] <- rep(c("brown4", "darkorchid4"), 2)

#Hard Code Correct Answers from Pew: http://www.pewinternet.org/files/2015/09/2015-09-10_science-knowledge_FINAL.pdf
for (i in 1:nrow(science.df)) {
  #margin of error
  if (science.df[i,3] == "2014 Pew Panel") science.df[i,5] <- 0.023
  #means
  if (science.df[i,3] == "2014 Pew Panel" & science.df[i,1] == "sci_waves_correct_t2") science.df[i,4] <- 0.72
  if (science.df[i,3] == "2014 Pew Panel" & science.df[i,1] == "sci_ocean_correct_t2") science.df[i,4] <- 0.76
  if (science.df[i,3] == "2014 Pew Panel" & science.df[i,1] == "sci_waterboil_correct_t2") science.df[i,4] <- 0.34
  if (science.df[i,3] == "2014 Pew Panel" & science.df[i,1] == "sci_poliovaccine_correct_t2") science.df[i,4] <- 0.74
}

#Add in online data
for (i in 1:nrow(science.df)) {
  #Calculate the means
  if (science.df[i,3] == "Online Respondents") science.df[i,4] <- wt.mean(t1.t0.phone_mail.data[which(!is.na(t1.t0.phone_mail.data$online_survey_t1)),science.df[i,1]],t1.t0.phone_mail.data$pweight[which(!is.na(t1.t0.phone_mail.data$online_survey_t1))])
  #Calculate the SEM
  if (science.df[i,3] == "Online Respondents") science.df[i,5] <- wt.sd(t1.t0.phone_mail.data[which(!is.na(t1.t0.phone_mail.data$online_survey_t1)),science.df[i,1]],t1.t0.phone_mail.data$pweight[which(!is.na(t1.t0.phone_mail.data$online_survey_t1))])/sqrt(length(t1.t0.phone_mail.data[which(!is.na(t1.t0.phone_mail.data$online_survey_t1)),science.df[i,1]])) 
}


#Clean data frame
science.df[,6] <- as.numeric(science.df[,4]) - as.numeric(science.df[,5])
science.df[,7] <- as.numeric(science.df[,4]) + as.numeric(science.df[,5])
science.df <- as.data.frame(science.df)
science.df <- rename(science.df, c("V1" = "variable", "V2" = "string", "V3" = "stage", "V4" = "mean", 
                                   "V5" = "sem", "V6" = "lower", "V7" = "upper", "V8" = "stagecolor"))
science.df$mean <- as.numeric(levels(science.df$mean)[science.df$mean])
science.df$sem <- as.numeric(levels(science.df$sem)[science.df$sem])
science.df$lower <- as.numeric(levels(science.df$lower)[science.df$lower])
science.df$upper <- as.numeric(levels(science.df$upper)[science.df$upper])
science.df$stagecolor <- as.character(science.df$stagecolor)

# plot
# dot plot
p <- ggplot(science.df, aes(x = mean, xmin = lower, xmax = upper, y = stage, colour = stagecolor)) +
  geom_point() +
  geom_errorbarh(aes(height = .5)) +
  facet_wrap(~ string, ncol = 2) +
  scale_colour_manual(values=science.df$stagecolor) + 
  scale_x_continuous(limits = c(0,1), breaks = seq(0, 1, .25), 
                     labels = c(0, "", .5, "", 1)) + 
  labs(x = "Proportion of Characteristic", y = NULL) + 
  theme_bw() +
  theme(panel.grid.major.y = element_line(colour = "grey60"), 
        panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.margin = unit(0, "lines"),
        strip.background = element_rect(fill = "grey80"),
        axis.ticks.y = element_blank(), 
        axis.text.y = element_text(color=c("brown4", "darkorchid4")),
        legend.position="none", text = element_text(family = 'Verdana')) +
  geom_vline(xintercept = 0)
p
#Figure OA1: Scientific Knowledge in 2014 Pew and Our Mail-to-Online Surveys
ggsave("output/sciknowrepfig.tiff", 
       p, width = 6, height = 8)

